package com.kdtech.analyse.video;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.kdtech.analyse.AnalyseNews;
import com.kdtech.analyse.JSoupUtils;
import com.kdtech.analyse.ParseState;
import com.kdtech.crawler.CrawlHTML;
import com.kdtech.entity.crawler.UrlMeta;
import com.kdtech.entity.data.NewsMeta;
import com.kdtech.utils.DateUtils;
import com.kdtech.utils.StringUtils;

/**
 * 中国广播网
 * @author Persh
 */
public class CnrtvAnalyse implements AnalyseNews{



	
	public boolean isDetailPage(String url) {
		boolean bRet = false;
		String[] regex = {
				"http://tv.cnr.cn/.*/[0-9]{6,8}/t[0-9]{8}_[0-9]*.(html|shtml)"
				};
		
		for (int i = 0; i < regex.length; i++) {
			if (url.matches(regex[i])) {
				return true;
			}
		}
		return bRet;
	}

	
	public NewsMeta parserHtml(UrlMeta urlMeta) {
		NewsMeta video = new NewsMeta();
		if (urlMeta.getHtml() == null) {
		}		
		String htmltxt = urlMeta.getHtml();
		String url = urlMeta.getUrl();		
		if(!isDetailPage(url)){
			return null;
		}				
		video.setUrl(url);		
		String title = null;
		String desc = null;
		Long date = null;
		Document doc = Jsoup.parse(htmltxt);
		title = doc.select("title").text();
		if(StringUtils.isNotBlank(title)&&title.equals("中国广播网--404页面")){
			video.setUpdateUrl(ParseState.ERR404.toString());
			return video;
		}
		date = DateUtils.matchDate(doc.select("span.lh20").text());
		
		if(StringUtils.isBlank(title)){
		}
		video.setAuthor(JSoupUtils.matchAuthor(doc, "来源："));
		video.setTitle(StringUtils.trimSpace(title));
		video.setContent(desc);
		video.setDate(date);
		
		return video;
	}
	

	
	public NewsMeta Update(NewsMeta meta) {
		return null;
	}

	
}
